library(tidyverse)
library(janitor)
library(purrr)
library(scrutiny)
library(readxl)
library(knitr)
library(kableExtra)
min_decimals <- function(x, digits = 2) {
sprintf(paste0("%.", digits, "f"), x)
}
# mean <- 17.50
# sd <- 12.40
# n_obs <- 35
#
# min_val <- 0
# max_val <- 63
# tides <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
#
# if(is.null(sd_prec)){
# sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
# }
#
# result <- c(-Inf, Inf)
#
# aMax <- min_val
# aMin <- floor(mean*n_items)/n_items
# bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val) # Adjusted here
# bMin <- min(aMin + 1/n_items, max_val) # Adjusted here
# total <- round(mean * n_obs * n_items)/n_items
#
# poss_values <- max_val
# for(i in seq_len(n_items)){
# poss_values <- c(poss_values, min_val:(max_val-1) + (1 / n_items) * (i - 1))
# }
# poss_values <- sort(poss_values)
#
# for(abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))){
#
# a <- abm[1]
# b <- abm[2]
# m <- abm[3]
#
# # Adjust a and b to be within min_val and max_val
# a <- min(max(a, min_val), max_val)
# b <- min(max(b, min_val), max_val)
#
# if(a == b){
# vec <- rep(a, n_obs)
# } else {
# k <- round((total - (n_obs * b)) / (a - b))
# k <- min(max(k, 1), n_obs - 1)
# vec <- c(rep(a, k), rep(b, n_obs - k))
# diff <- sum(vec) - total
#
# if ((diff < 0)) {
# vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n_obs - k))
# } else if ((diff > 0)) {
# vec <- c(rep(a, k), b - diff, rep(b, n_obs - k - 1))
# }
# }
#
# # Check if the calculated mean and values match expected conditions
# if(round(mean(vec), sd_prec) == round(mean, sd_prec) &
# all(floor(vec*10e9) %in% floor(poss_values*10e9))){
# result[m] <- round(sd(vec), sd_prec)
# }
#
# }
#
# # Replace Inf or -Inf with NA
# result[is.infinite(result)] <- NA
#
# res <-
# data.frame(sd_min = result[1],
# sd_max = result[2]) |>
# mutate(tides = case_when(sd < sd_min ~ FALSE,
# is.na(sd_min) ~ FALSE,
# sd > sd_max ~ FALSE,
# is.na(sd_max) ~ FALSE,
# TRUE ~ TRUE))
#
# return(res)
# }
# tides <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
#
# if(is.null(sd_prec)){
# sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
# }
#
# result <- c(-Inf, Inf)
#
# aMax <- min_val
# aMin <- floor(mean*n_items)/n_items
# bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val)
# bMin <- min(aMin + 1/n_items, max_val)
# total <- round(mean * n_obs * n_items)/n_items
#
# poss_values <- max_val
# for(i in seq_len(n_items)){
# poss_values <- c(poss_values, min_val:(max_val-1) + (1 / n_items) * (i - 1))
# }
# poss_values <- sort(poss_values)
#
# for(abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))){
#
# a <- abm[1]
# b <- abm[2]
# m <- abm[3]
#
# # Adjust a and b to be within min_val and max_val
# a <- min(max(a, min_val), max_val)
# b <- min(max(b, min_val), max_val)
#
# if(a == b){
# vec <- rep(a, n_obs)
# } else {
# k <- round((total - (n_obs * b)) / (a - b))
# k <- min(max(k, 1), n_obs - 1)
# vec <- c(rep(a, k), rep(b, n_obs - k))
# diff <- sum(vec) - total
#
# if ((diff < 0)) {
# vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n_obs - k))
# } else if ((diff > 0)) {
# vec <- c(rep(a, k), b - diff, rep(b, n_obs - k - 1))
# }
# }
#
# # # Check if the calculated mean and values match expected conditions
# # if(round(mean(vec), sd_prec) == round(mean, sd_prec) &
# # all(floor(vec*10e9) %in% floor(poss_values*10e9))){
# # result[m] <- round(sd(vec), sd_prec)
# # }
#
# tolerance <- 1e-6 # Adjust as appropriate
#
# if(abs(mean(vec) - mean) < tolerance &
# all(floor(vec * 1e9) %in% floor(poss_values * 1e9))){
# result[m] <- round(sd(vec), sd_prec)
# }
#
# }
#
# # Replace Inf or -Inf with NA
# result[is.infinite(result)] <- NA
#
# # Create the data frame with the new columns
# res <- data.frame(
# sd_min = result[1],
# sd_max = result[2]
# ) |> mutate(
# tides_range_calculable = !is.na(sd_min) & !is.na(sd_max),
# tides_inside_ranges = ifelse(
# tides_range_calculable,
# sd >= sd_min & sd <= sd_max,
# FALSE
# ),
# tides = tides_range_calculable & tides_inside_ranges
# )
#
# return(res)
# }
tides <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
if(is.null(sd_prec)){
sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
}
# Custom rounding functions
round_half_up <- function(x, digits = 0) {
posneg <- sign(x)
z <- abs(x) * 10^digits + 0.5
z <- trunc(z)
z <- z / 10^digits
z * posneg
}
round_half_down <- function(x, digits = 0) {
posneg <- sign(x)
z <- abs(x) * 10^digits - 0.5
z <- trunc(z)
z <- z / 10^digits
z * posneg
}
# Function to check mean equality with different rounding methods
mean_matches <- function(vec_mean, target_mean, sd_prec) {
rounded_means_vec <- c(round_half_up(vec_mean, sd_prec), round_half_down(vec_mean, sd_prec))
rounded_means_target <- c(round_half_up(target_mean, sd_prec), round_half_down(target_mean, sd_prec))
any(rounded_means_vec %in% rounded_means_target)
}
result <- c(-Inf, Inf)
aMin <- floor(mean * n_items) / n_items
aMax <- min_val
bMin <- min(aMin + 1 / n_items, max_val)
bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val)
total <- round(mean * n_obs * n_items) / n_items
poss_values <- seq(min_val, max_val, by = 1 / n_items)
for(abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))){
a <- abm[1]
b <- abm[2]
m <- abm[3]
# Adjust a and b to be within min_val and max_val
a <- min(max(a, min_val), max_val)
b <- min(max(b, min_val), max_val)
if(a == b){
vec <- rep(a, n_obs)
} else {
k <- round((total - (n_obs * b)) / (a - b))
k <- min(max(k, 1), n_obs - 1)
vec <- c(rep(a, k), rep(b, n_obs - k))
diff <- sum(vec) - total
if ((diff < 0)) {
adjusted_value <- a + abs(diff)
if(!(adjusted_value %in% poss_values)){
next # Skip to next iteration if adjusted value is invalid
}
vec <- c(rep(a, k - 1), adjusted_value, rep(b, n_obs - k))
} else if ((diff > 0)) {
adjusted_value <- b - diff
if(!(adjusted_value %in% poss_values)){
next # Skip to next iteration if adjusted value is invalid
}
vec <- c(rep(a, k), adjusted_value, rep(b, n_obs - k - 1))
}
}
# Check if the calculated mean and values match expected conditions
vec_mean <- mean(vec)
if(mean_matches(vec_mean, mean, sd_prec) &
all(floor(vec * 1e9) %in% floor(poss_values * 1e9))){
result[m] <- round(sd(vec), sd_prec)
}
}
# Replace Inf or -Inf with NA
result[is.infinite(result)] <- NA
# Create the data frame with the new columns
res <-
tibble(sd_min = result[1],
sd_max = result[2]) |>
mutate(tides_sd_range_calculable = !is.na(sd_min) & !is.na(sd_max),
tides_inside_ranges = ifelse(tides_sd_range_calculable,
mean >= min_val & mean <= max_val & sd >= sd_min & sd <= sd_max,
mean >= min_val & mean <= max_val),
tides = tides_sd_range_calculable & tides_inside_ranges) |>
# pomp scores
mutate(pomp_m = (mean - min_val)/(max_val - min_val),
pomp_sd = ifelse(!is.na(sd_min) & !is.na(sd_max), (sd - sd_min)/(sd_max - sd_min), NA))
return(res)
}HDRS-17 (17-item version): min 0, Max score of 52 (most common) HDRS-21 (21-item version): min 0, Max score of 64 HDRS-24 (24-item version): min 0, Max score of 74 HDRS-6 (6-item version): min 0, Max score of 22
PHQ-9: 0 to 27
ces-d: 0 to 60
Edinburgh Postnatal Depression Scale (EPDS): 0 to 30
Geriatric Depression Scale (GDS): - GDS-30 0 to 30 - GDS-15 0 to 15 * more common apparently, but worse fit to this data
bdi-1 155
hdrs 150
bdi-2 138
phq-9 98
ces-d 78
epds 46
gds 36
701 of 925 rows covered by these scales
using all scales with 10+ uses would bring it to about 800 of 925
dat <- read_xlsx("../data/The depression database - psy vs ctr.xlsx") |>
# col_types = c("text", # study
# "text", # CHANGES
# "text", # condition_arm1
# "text", # condition_arm2
# "text", # multi_arm1
# "text", # multi_arm2
# "numeric", # .g
# "numeric", # .g_se
# "numeric", # primary_calc
# "text", # outcome_type
# "text", # instrument
# "text", # rating
# "text", # mean_arm1
# "text", # sd_arm1
# "text", # n_arm1
# "text", # mean_arm2
# "text", # sd_arm2
# "text", # n_arm2
# "text", # mean_change_arm1
# "text", # mean_change_arm2
# "text", # sd_change_arm1
# "text", # sd_change_arm2
# "text", # n_change_arm1
# "text", # n_change_arm2
# "text", # dich_paper
# "numeric", # event_arm1
# "numeric", # event_arm2
# "numeric", # totaln_arm1
# "numeric", # totaln_arm2
# "numeric", # precalc_g
# "numeric", # precalc_g_se
# "logical", # precalc_log_rr
# "logical", # precalc_log_rr_se
# "text", # other_stat
# "text", # baseline_m_arm1
# "text", # baseline_sd_arm1
# "text", # baseline_n_arm1
# "text", # baseline_m_arm2
# "text", # baseline_sd_arm2
# "text", # baseline_n_arm2
# "numeric", # rand_arm1
# "numeric", # rand_arm2
# "numeric", # attr_arm1
# "numeric", # attr_arm2
# "text", # rand_ratio
# "numeric", # year
# "text", # time_weeks
# "text", # time_weeks_needs_cleaning
# "text", # time
# "text", # comorbid_mental
# "text", # format
# "text", # format_details
# "text", # n_sessions_arm1
# "text", # country
# "text", # age_group
# "text", # mean_age
# "text", # percent_women
# "text", # recruitment
# "text", # diagnosis
# "text", # target_group
# "text", # sg
# "text", # ac
# "text", # ba
# "text", # itt
# "numeric", # rob
# "text", # adm
# "text", # notes
# "numeric", # outcome_rank
# "numeric", # primary
# "numeric", # .log_rr
# "numeric", # .log_rr_se
# "numeric", # .event_arm1
# "numeric", # .event_arm2
# "numeric", # .totaln_arm1
# "numeric", # .totaln_arm2
# "text", # full_ref
# "text", # doi
# "text", # abstract
# "text", # title
# "text", # url
# "text", # journal
# "text", # id_study
# "text")) |> # .id
janitor::clean_names() |>
select(study, instrument, notes,
hedges_g = g,
mean_age, percent_women,
followup_m_1 = mean_arm1,
followup_sd_1 = sd_arm1,
followup_n_1 = n_arm1,
followup_m_2 = mean_arm2,
followup_sd_2 = sd_arm2,
followup_n_2 = n_arm2,
baseline_m_1 = baseline_m_arm1,
baseline_sd_1 = baseline_sd_arm1,
baseline_n_1 = baseline_n_arm1,
baseline_m_2 = baseline_m_arm2,
baseline_sd_2 = baseline_sd_arm2,
baseline_n_2 = baseline_n_arm2) |>
mutate(across(contains("_m_") | contains("_sd_"), round_half_up, digits = 2),
across(contains("_n_"), round_half_up, digits = 0)) |>
# tidy up instrument names
mutate(instrument = case_when(
# for the sake of the below tests, bdi versions have the same scoring so can be treated equivalently
instrument == "bdi-1" ~ "bdi",
instrument == "bdi-2" ~ "bdi",
instrument == "bdi-21" ~ "bdi",
# seems like DASS hasn't been standardized. although there are 21 and 42 item versions, need to check these aren't apples with oranges *** TODO ***
instrument == "dass-d" ~ "dass-d",
instrument == "dass21-d" ~ "dass-d",
instrument == "dass" ~ "dass-d",
instrument == "DASS-21-D" ~ "dass-d",
# some misspellings of "hdrs" or use of alternative acronyms
instrument == "hrds-17" ~ "hdrs", # the modal length scale is the 17 item, so i recode this to hrds on the assumption that's the 17 item.
instrument == "hrsd-17" ~ "hdrs", # the modal length scale is the 17 item, so i recode this to hrds on the assumption that's the 17 item.
instrument == "hdrs-17" ~ "hdrs", # the modal length scale is the 17 item, so i recode this to hrds on the assumption that's the 17 item.
instrument == "ham-d-18" ~ "hdrs-18", # im not aware of an 18 item version
instrument == "hamd21" ~ "hdrs-21",
# ill assume that these two versions of the HADS are equivalent
instrument == "hads-d-severity" ~ "hads-d",
# combined qids versions that are likely the same
instrument == "qids-sr16" ~ "qids",
instrument == "qids16sr" ~ "qids",
# regardless of whether SR or clinican rated, these are scored the same and can be combined here
instrument == "qids-c" ~ "qids",
instrument == "qids-sr" ~ "qids",
# regardless of whether SR or clinican rated, these are scored the same and can be combined here
instrument == "madrs-s" ~ "madrs",
instrument == "madrs-sr" ~ "madrs",
instrument == "madrs-cr" ~ "madrs",
TRUE ~ instrument)
)
dat |>
#filter(str_detect(instrument, "^q")) |>
count(instrument) |>
#pull(instrument)
arrange(desc(n))## # A tibble: 58 × 2
## instrument n
## <chr> <int>
## 1 bdi 303
## 2 hdrs 159
## 3 phq-9 98
## 4 ces-d 78
## 5 epds 46
## 6 gds 36
## 7 madrs 26
## 8 hads-d 18
## 9 qids 17
## 10 mmpi-d 16
## # ℹ 48 more rows
# dat |>
# count(instrument) |>
# #pull(instrument)
# arrange(desc(n)) |>
# slice(1:10) |>
# summarize(total = sum(n))
# top 10 = 797 / 925 = 86% of N
# top 13 (those used at least 10 times) = 834 / 925 = 90% of N
top_ten_measures <- dat |>
filter(instrument != "mmpi-d") |> # this is apparently scored using t score but can vary, exclude for the moment
count(instrument) |>
#pull(instrument)
arrange(desc(n)) |>
slice(1:10) |>
pull(instrument) |>
dput()## c("bdi", "hdrs", "phq-9", "ces-d", "epds", "gds", "madrs", "hads-d",
## "qids", "scl")
dat_for_grim <- dat |>
mutate(min_val = case_when(instrument %in% top_ten_measures ~ 0,
TRUE ~ NA),
max_val = case_when(instrument == "bdi" ~ 63,
instrument == "hdrs" ~ 52, # assuming 17 item version
instrument == "phq-9" ~ 27,
instrument == "ces-d" ~ 60,
instrument == "epds" ~ 30,
instrument == "gds" ~ 30, # assuming 30 item not 15 item version
instrument == "madrs" ~ 60,
instrument == "hads-d" ~ 21,
instrument == "qids" ~ 27, # assuming 16 item version
instrument == "scl" ~ 360, # assuming 90 item version not 25 or 27
TRUE ~ NA)) |>
#filter(str_detect(tolower(instrument), "bdi")) |>
#mutate(across(contains("_n_"), as.character)) |>
pivot_longer(cols = c(starts_with("followup"), starts_with("baseline")),
names_to = c("timepoint", ".value", "arm"),
names_sep = "_") |>
drop_na(m, sd, n)
#write_csv(dat_bdi, "../data/dat_bdi.csv")dat_phq9 <- dat |>
filter(instrument == "phq-9") |>
mutate(min_val = 0,
max_val = 27) |>
pivot_longer(cols = c(starts_with("followup"), starts_with("baseline")),
names_to = c("timepoint", ".value", "arm"),
names_sep = "_") |>
drop_na(m, sd, n)
# filter(sd <= (27-1)/2 & sd >= 0 & # exclude impossible values given the min min and max max SD
# m >= 0 & m <= 27) # exclude impossible values given the min and max mean
ggplot(dat_phq9, aes(sd)) +
geom_histogram(boundary = 0, binwidth = 0.25) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) # , limits = c(0, (27-1)/2)ggplot(dat_phq9, aes(m)) +
geom_histogram(boundary = 0, binwidth = 1) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) # , limits = c(0, 27)# just follow up, between groups
ggplot(dat_phq9, aes(m)) +
geom_histogram(boundary = 0, binwidth = 1) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) + # , limits = c(0, 27)
facet_grid(arm ~ timepoint)dat_phq_diff <- dat |>
filter(instrument == "phq-9") |>
mutate(mean_diff_followup = followup_m_1 - followup_m_2,
mean_mean_diff_followup = mean(mean_diff_followup, na.rm = TRUE),
sd_mean_diff_followup = sd(mean_diff_followup, na.rm = TRUE))
ggplot(dat_phq_diff, aes(mean_diff_followup)) +
geom_histogram(boundary = 0, binwidth = 0.5) +
geom_vline(aes(xintercept = mean_mean_diff_followup)) +
scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) dat_diff <- dat |>
mutate(mean_diff_followup = followup_m_1 - followup_m_2,
mean_diff_baseline = baseline_m_1 - baseline_m_2)
dat_diff_summary <- dat_diff |>
group_by(instrument) |>
summarize(mean_mean_diff_followup = mean(mean_diff_followup, na.rm = TRUE),
median_mean_diff_followup = median(mean_diff_followup, na.rm = TRUE),
sd_mean_diff_followup = sd(mean_diff_followup, na.rm = TRUE),
mean_mean_diff_baseline = mean(mean_diff_baseline, na.rm = TRUE),
median_mean_diff_baseline = median(mean_diff_baseline, na.rm = TRUE),
sd_mean_diff_baseline = sd(mean_diff_baseline, na.rm = TRUE),
n = n()) |>
filter(n >= 10) #including NAs
dat_diff_trimmed <- dat_diff |>
semi_join(dat_diff_summary, by = "instrument")
ggplot(dat_diff_trimmed, aes(mean_diff_followup)) +
geom_histogram(boundary = 0, binwidth = 1) +
geom_vline(data = dat_diff_summary, aes(xintercept = mean_mean_diff_followup), color = "blue", linetype = "dashed") +
geom_vline(data = dat_diff_summary, aes(xintercept = median_mean_diff_followup), color = "red", linetype = "dashed") +
scale_x_continuous(breaks = scales::breaks_pretty(n = 5)) +
facet_wrap(~ instrument, scale = "free")BDI issues:
hrds issues:
PHQ9:
CES-D issues:
ggplot(dat_diff_trimmed, aes(mean_diff_baseline)) +
geom_histogram(boundary = 0, binwidth = 1) +
geom_vline(data = dat_diff_summary, aes(xintercept = mean_mean_diff_baseline), color = "blue", linetype = "dashed") +
geom_vline(data = dat_diff_summary, aes(xintercept = median_mean_diff_baseline), color = "red", linetype = "dashed") +
scale_x_continuous(breaks = scales::breaks_pretty(n = 5)) +
facet_wrap(~ instrument, scale = "free")BDI issues
CES-D issues:
PHQ-9 issues:
Where the original study may have broken randomisation and controlled for baseline, ineffective interventions can be made to look effective.
ggplot(dat_diff_trimmed, aes(mean_diff_baseline, mean_diff_followup)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_point() +
facet_wrap(~ instrument, scale = "free")Studies inside the triangles to the left and right of the 0 have larger differences at baseline than followup and might be worth looking at.
dat_pomp <- dat |>
mutate(min_val = case_when(instrument %in% top_ten_measures ~ 0,
TRUE ~ NA),
max_val = case_when(instrument == "bdi" ~ 63,
instrument == "hdrs" ~ 52, # assuming 17 item version
instrument == "phq-9" ~ 27,
instrument == "ces-d" ~ 60,
instrument == "epds" ~ 30,
instrument == "gds" ~ 30, # assuming 30 item not 15 item version
instrument == "madrs" ~ 60,
instrument == "hads-d" ~ 21,
instrument == "qids" ~ 27, # assuming 16 item version
instrument == "scl" ~ 360, # assuming 90 item version not 25 or 27
TRUE ~ NA)) |>
mutate(pomp_mean_1_followup = (followup_m_1 - min_val)/(max_val - min_val),
pomp_mean_2_followup = (followup_m_2 - min_val)/(max_val - min_val),
pomp_mean_1_baseline = (baseline_m_1 - min_val)/(max_val - min_val),
pomp_mean_2_baseline = (baseline_m_2 - min_val)/(max_val - min_val)) |>
pivot_longer(cols = starts_with("pomp"),
names_to = "arm_timepoint",
values_to = "pomp_mean") |>
separate(arm_timepoint, into = c("scoring", "original_metric", "arm", "timepoint")) |>
select(-scoring, -original_metric) |>
drop_na(pomp_mean) |>
pivot_wider(names_from = arm,
names_prefix = "pomp_mean_condition_",
values_from = pomp_mean) |>
mutate(diff_pomp = pomp_mean_condition_1 - pomp_mean_condition_2) |>
select(-pomp_mean_condition_1, -pomp_mean_condition_2) |>
pivot_wider(names_from = timepoint,
names_prefix = "pomp_mean_diff_",
values_from = diff_pomp)
ggplot(dat_pomp, aes(pomp_mean_diff_baseline, pomp_mean_diff_followup, color = instrument)) +
geom_abline(slope = 0.5, intercept = 0, linetype = "dashed") +
geom_abline(slope = -0.5, intercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
geom_point() ggplot(dat_pomp, aes(pomp_mean_diff_baseline, pomp_mean_diff_followup)) +
geom_abline(slope = 0.5, intercept = 0, linetype = "dashed") +
geom_abline(slope = -0.5, intercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
geom_point() +
facet_wrap(~ instrument, scale = "free") dat_reshaped <- dat |>
pivot_longer(cols = c(starts_with("followup"), starts_with("baseline")),
names_to = c("timepoint", ".value", "arm"),
names_sep = "_") |>
drop_na(m, sd, n) |>
filter(timepoint == "followup")
dat_reshaped_summary <- dat_reshaped |>
group_by(instrument, arm) |>
summarize(mean_sd = mean(sd, na.rm = TRUE),
median_sd = median(sd, na.rm = TRUE),
sd_sd = sd(sd, na.rm = TRUE),
n = n()) |>
filter(n >= 10)
dat_reshaped_trimmed <- dat_reshaped |>
semi_join(dat_reshaped_summary, by = "instrument")
ggplot(dat_reshaped_trimmed, aes(sd)) +
geom_histogram(boundary = 0, binwidth = 1) +
geom_vline(data = dat_reshaped_summary, aes(xintercept = mean_sd), color = "blue", linetype = "dashed") +
geom_vline(data = dat_reshaped_summary, aes(xintercept = median_sd), color = "red", linetype = "dashed") +
scale_x_continuous(breaks = scales::breaks_pretty(n = 5)) +
facet_wrap(~ instrument + arm, scale = "free")BDI issues:
PHQ9 issues:
CES-D issues:
res <- dat_for_grim |>
mutate(m_prec = 2, # hard coded by rounding above for the moment
sd_prec = 2, # hard coded by rounding above for the moment
n_items = 1) |>
mutate(grim = pmap(list(as.character(m), n), grim)) |>
unnest(grim) |>
mutate(grimmer = pmap(list(as.character(m), as.character(sd), n), grimmer)) |>
unnest(grimmer) |>
mutate(tides = pmap(list(n, m, sd, min_val, max_val, sd_prec, n_items), possibly(tides, otherwise = NA))) |>
unnest(tides) |>
# master variable
mutate(tides = ifelse(is.na(tides), TRUE, tides)) |>
mutate(all_three = ifelse(grim + grimmer + tides < 3, FALSE, TRUE))
# res |>
# filter(timepoint == "followup") |>
# group_by(study) |>
# summarize(grim = as.logical(min(grim))) |>
# count(grim) |>
# kable(align = "r") |>
# kable_classic(full_width = FALSE)
#
# res |>
# filter(timepoint == "followup") |>
# group_by(study) |>
# summarize(grimmer = as.logical(min(grimmer))) |>
# count(grimmer) |>
# kable(align = "r") |>
# kable_classic(full_width = FALSE)
#
# res |>
# filter(timepoint == "followup") |>
# group_by(study) |>
# summarize(tides = as.logical(min(tides))) |>
# count(tides) |>
# kable(align = "r") |>
# kable_classic(full_width = FALSE)
#
# res |>
# filter(timepoint == "followup") |>
# group_by(study) |>
# summarize(grim = as.logical(min(grim)),
# grimmer = as.logical(min(grimmer)),
# tides = as.logical(min(tides))) |>
# count(grim, grimmer, tides) |>
# kable(align = "r") |>
# kable_classic(full_width = FALSE)
#
# res |>
# filter(timepoint == "followup") |>
# group_by(study) |>
# summarize(grim = as.logical(min(grim)),
# grimmer = as.logical(min(grimmer)),
# tides = as.logical(min(tides))) |>
# count() |>
# kable(align = "r") |>
# kable_classic(full_width = FALSE)
res |>
count(grim, grimmer, tides) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| grim | grimmer | tides | n | percent |
|---|---|---|---|---|
| FALSE | FALSE | FALSE | 103 | 3.1 |
| FALSE | FALSE | TRUE | 336 | 10.2 |
| TRUE | FALSE | FALSE | 1 | 0.0 |
| TRUE | FALSE | TRUE | 106 | 3.2 |
| TRUE | TRUE | FALSE | 247 | 7.5 |
| TRUE | TRUE | TRUE | 2511 | 76.0 |
# all tests, all stats
res |>
count(tides) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| tides | n | percent |
|---|---|---|
| FALSE | 351 | 10.6 |
| TRUE | 2953 | 89.4 |
# tides range check, all stats
res |>
count(tides_inside_ranges) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| tides_inside_ranges | n | percent |
|---|---|---|
| FALSE | 13 | 0.4 |
| TRUE | 2847 | 86.2 |
| NA | 444 | 13.4 |
# tides range check, RCT level
res |>
group_by(study) |>
summarize(tides_inside_ranges = as.logical(min(tides_inside_ranges))) |>
count(tides_inside_ranges) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| tides_inside_ranges | n | percent |
|---|---|---|
| FALSE | 7 | 1.6 |
| TRUE | 356 | 82.0 |
| NA | 71 | 16.4 |
res |>
mutate(tides_inside_ranges_binary = ifelse(is.na(tides_inside_ranges), TRUE, tides_inside_ranges)) |>
group_by(study) |>
summarize(grim = as.logical(min(grim)),
grimmer = as.logical(min(grimmer)),
tides_inside_ranges_binary = as.logical(min(tides_inside_ranges_binary))) |>
count(grim, grimmer, tides_inside_ranges_binary) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| grim | grimmer | tides_inside_ranges_binary | n | percent |
|---|---|---|---|---|
| FALSE | FALSE | FALSE | 4 | 0.9 |
| FALSE | FALSE | TRUE | 133 | 30.6 |
| TRUE | FALSE | TRUE | 16 | 3.7 |
| TRUE | TRUE | FALSE | 3 | 0.7 |
| TRUE | TRUE | TRUE | 278 | 64.1 |
# tides, all stats
res |>
count(all_three) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| all_three | n | percent |
|---|---|---|
| FALSE | 793 | 24 |
| TRUE | 2511 | 76 |
# all tests, follow up only
res |>
filter(timepoint == "followup") |>
count(all_three) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| all_three | n | percent |
|---|---|---|
| FALSE | 456 | 27.3 |
| TRUE | 1214 | 72.7 |
# all tests, all stats, article level
res |>
group_by(study) |>
summarize(all_three = as.logical(min(all_three))) |>
count(all_three) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| all_three | n | percent |
|---|---|---|
| FALSE | 217 | 50 |
| TRUE | 217 | 50 |
# all tests, follow up only, article level
res |>
filter(timepoint == "followup") |>
group_by(study) |>
summarize(all_three = as.logical(min(all_three))) |>
count(all_three) |>
mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| all_three | n | percent |
|---|---|---|
| FALSE | 189 | 44.1 |
| TRUE | 240 | 55.9 |
# ggplot(res, aes(pomp_m, pomp_sd, color = all_tests)) +
# geom_point(shape = 15, size = 2, alpha = 0.8) +
# #geom_line() +
# theme_linedraw() +
# scale_color_viridis_d(begin = 0.3, end = 0.7) +
# xlab("POMP Mean") +
# ylab("POMP SD")
#
res |>
filter(!is.na(tides_inside_ranges)) |>
ggplot(aes(m, sd, color = tides_inside_ranges)) +
geom_point(alpha = 0.75) + # shape = 15, size = 2,
#geom_line() +
theme_linedraw() +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
#xlim(0,1) + # could cut off impossible results!
xlab("Mean") +
ylab("SD") +
facet_wrap(~ instrument, scales = "free")res |>
filter(!is.na(tides_inside_ranges)) |>
ggplot(aes(pomp_m, pomp_sd, color = tides_inside_ranges)) +
geom_rect(aes(xmin = 0, xmax = 1, ymin = 0, ymax = 1), fill = "grey90", color = NA) +
geom_point(alpha = 0.5) + # shape = 15, size = 2,
#geom_line() +
theme_linedraw() +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
xlim(0,1) + # could cut off impossible results!
xlab("POMP Mean") +
ylab("POMP SD")res |>
filter(!is.na(tides_inside_ranges)) |>
ggplot(aes(pomp_m, pomp_sd, color = tides_inside_ranges)) +
geom_rect(aes(xmin = 0, xmax = 1, ymin = 0, ymax = 1), fill = "grey90", color = NA) +
geom_point(alpha = 0.5) + # shape = 15, size = 2,
#geom_line() +
theme_linedraw() +
scale_color_viridis_d(begin = 0.3, end = 0.7) +
xlim(0,1) + # could cut off impossible results!
xlab("POMP Mean") +
ylab("POMP SD") +
facet_wrap(~ instrument)